library(tidyverse)
library(sf)
library(raster)
library(knitr)
library(kableExtra)
library(tidycensus)
library(tigris)
library(FNN)
#library(QuantPsyc) # JE Note: in R 4.1, QuantPsyc package not available.
library(caret)
library(yardstick)
library(pscl)
library(plotROC) 
library(ggrepel)
library(pROC)
library(grid)
library(gridExtra)
library(viridis)
library(igraph)

plotTheme <- theme(
  plot.title =element_text(size=12),
  plot.subtitle = element_text(size=8),
  plot.caption = element_text(size = 6),
  axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
  axis.text.y = element_text(size = 10),
  axis.title.y = element_text(size = 10),
  # Set the entire chart region to blank
  panel.background=element_blank(),
  plot.background=element_blank(),
  #panel.border=element_rect(colour="#F0F0F0"),
  # Format the grid
  panel.grid.major=element_line(colour="#D0D0D0",size=.75),
  axis.ticks=element_blank())

mapTheme <- theme(plot.title =element_text(size=12),
                  plot.subtitle = element_text(size=8),
                  plot.caption = element_text(size = 6),
                  axis.line=element_blank(),
                  axis.text.x=element_blank(),
                  axis.text.y=element_blank(),
                  axis.ticks=element_blank(),
                  axis.title.x=element_blank(),
                  axis.title.y=element_blank(),
                  panel.background=element_blank(),
                  panel.border=element_blank(),
                  panel.grid.major=element_line(colour = 'transparent'),
                  panel.grid.minor=element_blank(),
                  legend.direction = "vertical", 
                  legend.position = "right",
                  plot.margin = margin(1, 1, 1, 1, 'cm'),
                  legend.key.height = unit(1, "cm"), legend.key.width = unit(0.2, "cm"))

palette2 <- c("#41b6c4","#253494")
palette4 <- c("#a1dab4","#41b6c4","#2c7fb8","#253494")
palette5 <- c("#ffffcc","#a1dab4","#41b6c4","#2c7fb8","#253494")
palette10 <- c("#f7fcf0","#e0f3db","#ccebc5","#a8ddb5","#7bccc4",
               "#4eb3d3","#2b8cbe","#0868ac","#084081","#f7fcf0")
#this function converts a column in to quintiles. It is used for mapping.
quintileBreaks <- function(df,variable) {
    as.character(quantile(df[[variable]],
                          c(.01,.2,.4,.6,.8),na.rm=T))
}

#This function can be used to convert a polygon sf to centroids xy coords.
xyC <- function(aPolygonSF) {
  as.data.frame(
    cbind(x=st_coordinates(st_centroid(aPolygonSF))[,1],
          y=st_coordinates(st_centroid(aPolygonSF))[,2]))
} 

#this function convert a raster to a data frame so it can be plotted in ggplot
rast <- function(inRaster) {
  data.frame(
    xyFromCell(inRaster, 1:ncell(inRaster)), 
    value = getValues(inRaster)) }

1: Set up

dvMSA <- 
  st_read("/Users/luyiiwong/Documents/Land Use & Environmental Modeling/Assignment5/dv_counties/dv_counties.shp") 

lc_2011 = raster("/Users/luyiiwong/Documents/Land Use & Environmental Modeling/Assignment5/lc_2011_clip_Resample.tif")
lc_2021 = raster("/Users/luyiiwong/Documents/Land Use & Environmental Modeling/Assignment5/lc_2021_clip_Resample.tif")

lc_change <- lc_2011+lc_2021

2: Calculating Land Cover Change

#creating a matrix to classify urban and non-urban land use
reclassMatrix <- 
  matrix(c(
    0,12,0,
    12,24,1,
    24,Inf,0),
  ncol=3, byrow=T)
# reclassifying land cover for both years
developed_2011 <- 
  reclassify(lc_2011,reclassMatrix)

developed_2021 <- 
  reclassify(lc_2021,reclassMatrix)
# calculating land use change from 2011 to 2021
development_change <- developed_2011+developed_2021

# the values that are =1 indicate that there was CHANGE between 2011 and 2021
# histogram shows that there were very few values in the study area
hist(development_change)

# reclassifying values that are not equal to 1 to NA in development change
# this means that values 0 and 2 become NA
development_change[development_change != 1] <- NA

# plotting the values of development change = 1 spatially
ggplot() +
  geom_sf(data=dvMSA) +
  geom_raster(data=rast(development_change) %>% na.omit, 
              aes(x,y,fill=as.factor(value))) +
  scale_fill_viridis(discrete=TRUE, name ="Land Cover\nChange") + 
  labs(title="Development land use change") +
  mapTheme

2.1: Creating the fishnet

dvMSA_fishnet <- 
  st_make_grid(dvMSA, 500) %>%
  st_sf()

dvMSA_fishnet <-
  dvMSA_fishnet[dvMSA,]
ggplot() +
  geom_sf(data=dvMSA_fishnet) +
  labs(title="Fishnet, 500 Foot Resolution") +
  mapTheme

#original version
changePoints <-
  rasterToPoints(development_change) %>%
  as.data.frame() %>%
  st_as_sf(coords = c("x", "y"), crs = st_crs(dvMSA_fishnet))

# changed this naming from lc_change to layer then converted NA to 0
# because in fishnet here, it's called layer not lc_change
fishnet <- 
  aggregate(changePoints, dvMSA_fishnet, sum) %>%
  mutate(layer = ifelse(is.na(layer),0,1),
         layer = as.factor(layer))

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)$x, y=xyC(fishnet)$y, colour=layer)) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","New Development"),
                      name = "") +
  labs(title = "Land Cover Development Change", subtitle = "As fishnet centroids") +
  mapTheme

2.2: Plotting Land Cover in 2011

ggplot() +
  geom_sf(data=dvMSA) +
  geom_raster(data=rast(lc_2011) %>% na.omit %>% filter(value > 0), 
              aes(x,y,fill=as.factor(value))) +
  scale_fill_viridis(discrete=TRUE, name ="") +
  labs(title = "Land Cover, 2011") +
  mapTheme +
  theme(legend.direction="horizontal")

The table below shows the approach taken to recoded existing land cover codes into the categories used in our analysis. In the code block below new rasters are generated and names are applied. Naming ensures that when the raster is integrated with the fishnet, the column reflects the appropriate raster.

Old_Classification New_Classification
Open Space as well as Low, Medium and High Intensity Development Developed
Deciduous, Evergreen, and Mixed Forest Forest
Pasture/Hay and Cultivated Crops Farm
Woody and Emergent Herbaceous Wetlands Woodlands
Barren Land, Dwarf Scrub, and Grassland/Herbaceous Other Undeveloped
Water Water
developed <- lc_2011 == 21 | lc_2011 == 22 | lc_2011 == 23 | lc_2011 == 24
forest <- lc_2011 == 41 | lc_2011 == 42 | lc_2011 == 43 
farm <- lc_2011 == 81 | lc_2011 == 82 
wetlands <- lc_2011 == 90 | lc_2011 == 95 
otherUndeveloped <- lc_2011 == 52 | lc_2011 == 71 | lc_2011 == 31 
water <- lc_2011 == 11

names(developed) <- "developed"
names(forest) <- "forest"
names(farm) <- "farm"
names(wetlands) <- "wetlands"
names(otherUndeveloped) <- "otherUndeveloped"
names(water) <- "water"
aggregateRaster <- function(inputRasterList, theFishnet) {
  #create an empty fishnet with the same dimensions as the input fishnet
  theseFishnets <- theFishnet %>% dplyr::select()
  #for each raster in the raster list
  for (i in inputRasterList) {
  #create a variable name corresponding to the ith raster
  varName <- names(i)
  #convert raster to points as an sf
    thesePoints <-
      rasterToPoints(i) %>%
      as.data.frame() %>%
      st_as_sf(coords = c("x", "y"), crs = st_crs(theFishnet)) %>%
      filter(.[[1]] == 1)
  #aggregate to the fishnet
    thisFishnet <-
      aggregate(thesePoints, theFishnet, length) %>%
      mutate(!!varName := ifelse(is.na(.[[1]]),0,1))
  #add to the larger fishnet
    theseFishnets <- cbind(theseFishnets,thisFishnet)
  }
  #output all aggregates as one large fishnet
   return(theseFishnets)
  }
theRasterList <- c(developed,forest,farm,wetlands,otherUndeveloped,water)

aggregatedRasters <-
  aggregateRaster(theRasterList, dvMSA_fishnet) %>%
  dplyr::select(developed,forest,farm,wetlands,otherUndeveloped,water) %>%
  mutate_if(is.numeric,as.factor)

aggregatedRasters %>%
  gather(var,value,developed:water) %>%
  st_cast("POLYGON") %>%    #just to make sure no weird geometries slipped in
  mutate(X = xyC(.)$x,
         Y = xyC(.)$y) %>%
  ggplot() +
    geom_sf(data=dvMSA) +
    geom_point(aes(X,Y, colour=as.factor(value))) +
    facet_wrap(~var) +
    scale_colour_manual(values = palette2,
                        labels=c("Other","Land Cover"),
                        name = "") +
    labs(title = "Land Cover Types, 2011",
         subtitle = "As fishnet centroids") +
   mapTheme

3: Pulling Demographic Data

# setting up api key 
census_api_key("b83a23afee4a8ed0fa131e449869e6577b87151e", overwrite = TRUE, install = TRUE)

Delaware Valley MSA, Pennsylvania Counties:

  • Bucks County
  • Chester County
  • Delaware County
  • Montgomery County
  • Philadelphia County
# pulling data from census for 2011
dvpop_2011 <- get_acs(geography = "tract", 
          variables = c("B01003_001E"), 
          year = 2011, 
          state = "PA", 
          county = c("Bucks", "Chester", "Delaware", "Montgomery", "Philadelphia"),
          geometry = TRUE, 
          output = "wide") %>%
  rename(pop_2011 = B01003_001E) %>%
  dplyr::select(GEOID, NAME, pop_2011, geometry)%>%
  st_transform(st_crs(dvMSA_fishnet))


# pulling data from census for 2021
dvpop_2021 <- get_acs(geography = "tract", 
          variables = c("B01003_001E"), 
          year = 2021, 
          state = "PA", 
          county = c("Bucks", "Chester", "Delaware", "Montgomery", "Philadelphia"),
          geometry = TRUE, 
          output = "wide") %>%
  rename(pop_2021 = B01003_001E) %>%
  dplyr::select(GEOID, NAME, pop_2021, geometry)%>%
  st_transform(st_crs(dvMSA_fishnet))




## grid arrange tract 2011 v 2021
# WHY are values NA????
grid.arrange(
ggplot() +
  geom_sf(data = dvpop_2011, aes(fill=factor(ntile(pop_2011,5))), colour=NA) +
  scale_fill_manual(values = palette5,
                    labels=quintileBreaks(dvpop_2011,"pop_2011"),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delware Valley PA counties by tract: 2011") +
  mapTheme,

ggplot() +
  geom_sf(data = dvpop_2021, aes(fill=factor(ntile(pop_2021,5))), colour=NA) +
  scale_fill_manual(values = palette5,
                    labels=quintileBreaks(dvpop_2021,"pop_2021"),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delware Valley PA counties by tract: 2021") +
  mapTheme, ncol=2)

3.1: Interpolating Population and Fishnet

dvMSA_fishnet <-
  dvMSA_fishnet %>%
  rownames_to_column("fishnetID") %>% 
  mutate(fishnetID = as.numeric(fishnetID)) %>%
  dplyr::select(fishnetID)

fishnetPopulation11 <-
  st_interpolate_aw(dvpop_2011["pop_2011"], dvMSA_fishnet, extensive=TRUE) %>%
  as.data.frame(.) %>%
  rownames_to_column(var = "fishnetID") %>%
  left_join(dvMSA_fishnet %>%
              mutate(fishnetID = as.character(fishnetID)),
            ., by=c("fishnetID"='fishnetID')) %>% 
  mutate(pop_2011 = replace_na(pop_2011,0)) %>%
  dplyr::select(pop_2011)

fishnetPopulation21 <-
  st_interpolate_aw(dvpop_2021["pop_2021"],dvMSA_fishnet, extensive=TRUE) %>%
  as.data.frame(.) %>%
  rownames_to_column(var = "fishnetID") %>%
  left_join(dvMSA_fishnet %>%
              mutate(fishnetID = as.character(fishnetID)),
            ., by=c("fishnetID"='fishnetID')) %>% 
  mutate(pop_2021 = replace_na(pop_2021,0)) %>%
  dplyr::select(pop_2021)

fishnetPopulation <- 
  cbind(fishnetPopulation11,fishnetPopulation21) %>%
  dplyr::select(pop_2011,pop_2021) %>%
  mutate(pop_Change = pop_2021 - pop_2011)
grid.arrange(
ggplot() +
  geom_sf(data=dvpop_2021, aes(fill=factor(ntile(pop_2021,5))),colour=NA) +
  scale_fill_manual(values = palette5,
                    labels=substr(quintileBreaks(dvpop_2021,"pop_2021"),1,4),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delaware Valley MSA, PA: 2021",
       subtitle="Represented as tracts; Boundaries omitted") +
  mapTheme,

ggplot() +
  geom_sf(data=fishnetPopulation, aes(fill=factor(ntile(pop_2021,5))),colour=NA) +
  scale_fill_manual(values = palette5,
                   labels=substr(quintileBreaks(fishnetPopulation,"pop_2021"),1,4),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delaware Valley MSA, PA: 2021",
       subtitle="Represented as fishnet gridcells; Boundaries omitted") +
  mapTheme, ncol=2)

4: Highway Distance

#dvHighways <-
#  st_read("C:/Users/ferna/OneDrive/Documents/ArcGIS/Projects/CPLN 6750/HW5/dv_roads.geojson") %>%
#  st_transform(st_crs(dvMSA)) %>%
#  st_intersection(dvMSA)

dvHighways <-
  st_read("/Users/luyiiwong/Documents/GitHub/LandUseModeling_HW5/DelawareValley/dv_roads.geojson") %>%
  st_transform(st_crs(dvMSA)) %>%
  st_intersection(dvMSA)
ggplot() +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2],colour=layer),size=1.5) +
  geom_sf(data=dvHighways) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","New Development")) +
  labs(title = "New Development and Highways",
       subtitle = "As fishnet centroids") +
  mapTheme

emptyRaster <- lc_change
emptyRaster[] <- NA

dvHighways_spdf <- as(dvHighways, "Spatial")
highway_raster <- rasterize(dvHighways, emptyRaster)

#highway_raster <- 
  #as(dvHighways,'Spatial') %>%
  #rasterize(.,emptyRaster)

highway_raster_distance <- distance(highway_raster)
names(highway_raster_distance) <- "distance_highways"

highwayPoints <-
  rasterToPoints(highway_raster_distance) %>%
  as.data.frame() %>%
  st_as_sf(coords = c("x", "y"), crs = st_crs(dvMSA_fishnet))

highwayPoints_fishnet <- 
  aggregate(highwayPoints, dvMSA_fishnet, mean) %>%
  mutate(distance_highways = ifelse(is.na(distance_highways),0,distance_highways))

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=highwayPoints_fishnet, aes(x=xyC(highwayPoints_fishnet)[,1], 
                                             y=xyC(highwayPoints_fishnet)[,2], 
                 colour=factor(ntile(distance_highways,5))),size=1.5) +
  scale_colour_manual(values = palette5,
                      labels=substr(quintileBreaks(highwayPoints_fishnet,"distance_highways"),1,8),
                      name="Quintile/nBreaks") +
  geom_sf(data=dvHighways, colour = "red") +
  labs(title = "Distance to Highways",
       subtitle = "As fishnet centroids; Highways visualized in red") +
  mapTheme

5: Spatial lag

nn_function <- function(measureFrom,measureTo,k) {
  #convert the sf layers to matrices
  measureFrom_Matrix <-
    as.matrix(measureFrom)
  measureTo_Matrix <-
    as.matrix(measureTo)
  nn <-   
    get.knnx(measureTo, measureFrom, k)$nn.dist
    output <-
    as.data.frame(nn) %>%
    rownames_to_column(var = "thisPoint") %>%
    gather(points, point_distance, V1:ncol(.)) %>%
    arrange(as.numeric(thisPoint)) %>%
    group_by(thisPoint) %>%
    summarize(pointDistance = mean(point_distance)) %>%
    arrange(as.numeric(thisPoint)) %>% 
    dplyr::select(-thisPoint) %>%
    pull()
  
  return(output)  
}
fishnet$lagDevelopment <-
    nn_function(xyC(fishnet),
                xyC(filter(aggregatedRasters,developed==1)),
                2)

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2], 
                 colour= log(lagDevelopment), size=.001)) +
  labs(title = "Spatial Lag to 2011 Development",
       subtitle = "As fishnet centroids")

#how should we interpret this?
hist(fishnet$lagDevelopment)

# make histogram
# try viridis
# log color/lagdevelopment
# filter out og developed cells 

6: Create MSA Counties

options(tigris_class = "sf")

studyAreaCounties <- 
  counties("Pennsylvania") %>%
  st_transform(st_crs(dvMSA)) %>%
  dplyr::select(NAME) %>%
  .[st_buffer(dvMSA,-500), , op=st_intersects]
ggplot() +
  geom_sf(data=studyAreaCounties) +
  labs(title = "Study Area Counties") +
  mapTheme

7: Create the Final Dataset

Once we join the data set with out county boundaries, all the lc_change =1 is dropped

# can we go through this code - specifically the mutate function
# making sure logic is right, that change is from non-developed to developed
# basically ignoring classifications that are wrong
dat <- 
  cbind(fishnet, highwayPoints_fishnet, fishnetPopulation, aggregatedRasters)%>%
  dplyr::select(layer, developed, forest, farm, wetlands, otherUndeveloped, water,
                pop_2011, pop_2021, pop_Change, distance_highways,lagDevelopment) %>%
  st_join(studyAreaCounties) %>%
  mutate(developed10 = ifelse(layer == 1 & developed == 1, 0, developed)) %>%
  filter(water == 0) 

8: Exploratroy Analysis

It seems like land change only occured from undeveloped to water. Therefore, there is no new development…

dat %>%
  dplyr::select(distance_highways,lagDevelopment,layer) %>%
  gather(Variable, Value, -layer, -geometry) %>%
  ggplot(., aes(layer, Value, fill=layer)) + 
    geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
    facet_wrap(~Variable) +
    scale_fill_manual(values = palette2,
                      labels=c("No Change","New Development"),
                      name="") +
    labs(title="New Development as a Function of the Continuous Variables") +
    plotTheme 

dat %>%
  dplyr::select(pop_2011,pop_2021,pop_Change,layer) %>%
  gather(Variable, Value, -layer, -geometry) %>%
  ggplot(., aes(layer, Value, fill=layer)) + 
    geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
    facet_wrap(~Variable) +
    scale_fill_manual(values = palette2,
                      labels=c("No Change","New Development"),
                      name="") +
    labs(title="New Development as a Function of Factor Variables") +
    plotTheme

dat %>%
  dplyr::select(layer:otherUndeveloped,developed) %>%
  gather(Land_Cover_Type, Value, -layer, -geometry) %>%
   st_set_geometry(NULL) %>%
     group_by(layer, Land_Cover_Type) %>%
     summarize(n = sum(as.numeric(Value))) %>%
     ungroup() %>%
    mutate(Conversion_Rate = paste0(round(100 * n/sum(n), 2), "%")) %>%
    filter(layer == 1) %>%
  dplyr::select(Land_Cover_Type,Conversion_Rate) %>%
  kable() %>% kable_styling(full_width = F)
Land_Cover_Type Conversion_Rate
developed 0.01%
farm 0.43%
forest 0.25%
otherUndeveloped 0.05%
wetlands 0.01%

9: Predicting for 2021 Model

set.seed(3456)
trainIndex <- 
  createDataPartition(dat$developed, p = .50,
                                  list = FALSE,
                                  times = 1)
datTrain <- dat[ trainIndex,]
datTest  <- dat[-trainIndex,]
Model1 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped, 
              family="binomial"(link="logit"), data = datTrain)

Model2 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment, 
              family="binomial"(link="logit"), data = datTrain)
              
Model3 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_2011, 
              family="binomial"(link="logit"), data = datTrain)          
              
Model4 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_2011 + 
              pop_2021, 
              family="binomial"(link="logit"), data = datTrain)              
            
Model5 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_Change, 
              family="binomial"(link="logit"), data = datTrain)              
              
Model6 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_Change + 
              distance_highways, 
              family="binomial"(link="logit"), data = datTrain) 
modelList <- paste0("Model", 1:6)
map_dfc(modelList, function(x)pR2(get(x)))[4,] %>%
  setNames(paste0("Model",1:6)) %>%
  gather(Model,McFadden) %>%
  ggplot(aes(Model,McFadden)) +
    geom_bar(stat="identity") +
    labs(title= "McFadden R-Squared by Model") +
    plotTheme

testSetProbs <- 
  data.frame(class = datTest$layer,
             probs = predict(Model6, datTest, type="response")) 
  
ggplot(testSetProbs, aes(probs)) +
  geom_density(aes(fill=class), alpha=0.5) +
  scale_fill_manual(values = palette2,
                    labels=c("No Change","New Development")) +
  labs(title = "Histogram of test set predicted probabilities",
       x="Predicted Probabilities",y="Density") +
  plotTheme

9.1: Accuracy

options(yardstick.event_first = FALSE)

testSetProbs <- 
  testSetProbs %>% 
  mutate(predClass_05 = as.factor(ifelse(testSetProbs$probs >= 0.05 ,1,0)),
         predClass_17 = as.factor(ifelse(testSetProbs$probs >= 0.17 ,1,0))) 

testSetProbs %>%
  dplyr::select(-probs) %>%
  gather(Variable, Value, -class) %>%
  group_by(Variable) %>%
  summarize(Sensitivity = round(yardstick::sens_vec(class,factor(Value)),2),
            Specificity = round(yardstick::spec_vec(class,factor(Value)),2),
            Accuracy = round(yardstick::accuracy_vec(class,factor(Value)),2)) %>% 
  kable() %>%
  kable_styling(full_width = F)
Variable Sensitivity Specificity Accuracy
predClass_05 0.99 0.03 0.99
predClass_17 1.00 0.00 0.99
predsForMap <-         
  dat %>%
    mutate(probs = predict(Model6, dat, type="response") ,
           Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
           Threshold_17_Pct =  as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
    dplyr::select(layer,Threshold_5_Pct,Threshold_17_Pct) %>%
    gather(Variable,Value, -geometry) %>%
    st_cast("POLYGON")
ggplot() +
  geom_point(data=predsForMap, aes(x=xyC(predsForMap)[,1], y=xyC(predsForMap)[,2], colour=Value)) +
  facet_wrap(~Variable) +
  scale_colour_manual(values = palette2, labels=c("No Change","New Development"),
                      name="") +
  labs(title="Development Predictions - Low Threshold") + 
  mapTheme

ConfusionMatrix.metrics <-
  dat %>%
    mutate(probs = predict(Model6, dat, type="response") ,
           Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
           Threshold_17_Pct =  as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
    mutate(TrueP_05 = ifelse(layer  == 1 & Threshold_5_Pct == 1, 1,0),
           TrueN_05 = ifelse(layer  == 0 & Threshold_5_Pct == 0, 1,0),
           TrueP_17 = ifelse(layer  == 1 & Threshold_17_Pct == 1, 1,0),
           TrueN_17 = ifelse(layer  == 0 & Threshold_17_Pct == 0, 1,0)) %>%
    dplyr::select(., starts_with("True")) %>%
    gather(Variable, Value, -geometry) %>%
    st_cast("POLYGON") 
ggplot(data=ConfusionMatrix.metrics) +
  geom_point(aes(x=xyC(ConfusionMatrix.metrics)[,1], 
                 y=xyC(ConfusionMatrix.metrics)[,2], colour = as.factor(Value))) +
  facet_wrap(~Variable) +
  scale_colour_manual(values = palette2, labels=c("Correct","Incorrect"),
                       name="") +
  labs(title="Development Predictions - Low Threshold") + mapTheme

10: Predicting Land Cover Demand for 2031

dat <-
  dat %>%
  mutate(lagDevelopment = nn_function(xyC(.), xyC(filter(.,developed10 == 1)),2))
#CHANGE THIS CODE
countyPopulation_2031 <- 
  data.frame(
   NAME = 
     c("Philadelphia","Chester","Montgomery","Bucks","Delaware"),
   county_projection_2031 = 
     c(1643971, 599932, 884387, 669299, 577248)) %>%
  #CHANGE LINE ABOVE - from a study DVRPC population projection
   left_join(
     dat %>%
       st_set_geometry(NULL) %>%
       group_by(NAME) %>%
       summarize(county_population_2021 = round(sum(pop_2021))))

countyPopulation_2031 %>%
  gather(Variable,Value, -NAME) %>%
  ggplot(aes(reorder(NAME,-Value),Value)) +
  geom_bar(aes(fill=Variable), stat = "identity", position = "dodge") +
  scale_fill_manual(values = palette2,
                    labels=c("2021","2031"),
                    name="Population") +
  labs(title="Population Change by County: 2021 - 2031",
       x="County", y="Population") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  plotTheme

10.1: Predicting Development Demand

# section edited
dat_infill <-
  dat %>%
  #calculate population change
    left_join(countyPopulation_2031) %>%
    mutate(proportion_of_county_pop = pop_2021 / county_population_2021,
           pop_2031.infill = proportion_of_county_pop * county_projection_2031,
           pop_Change = round(pop_2031.infill - pop_2021),2) %>%
    dplyr::select(-county_projection_2031, -county_population_2021, 
                  -proportion_of_county_pop, -pop_2031.infill) %>%
  #predict for 2031
    mutate(predict_2031.infill = predict(Model6,. , type="response"))

dat_infill %>%
  ggplot() +  
  geom_point(aes(x=xyC(dat_infill)[,1], y=xyC(dat_infill)[,2], colour = factor(ntile(predict_2031.infill,5)))) +
  scale_colour_manual(values = palette5,
                    labels=substr(quintileBreaks(dat_infill,"predict_2031.infill"),1,4),
                    name="Quintile\nBreaks") +
  geom_sf(data=studyAreaCounties, fill=NA, colour="black", size=1) +
  labs(title= "Development Demand in 2031: Predicted Probabilities") +
  mapTheme

11: Comparing Predicted Development Demand & Environmental Sensitivity

11.2: 2021 Land Cover Data

developed21 <- lc_2021 == 21 | lc_2021 == 22 | lc_2021 == 23 | lc_2021 == 24
forest21 <- lc_2021 == 41 | lc_2021 == 42 | lc_2021 == 43 
farm21 <- lc_2021 == 81 | lc_2021 == 82 
wetlands21 <- lc_2021 == 90 | lc_2021 == 95 
otherUndeveloped21 <- lc_2021 == 52 | lc_2021 == 71 | lc_2021 == 31 
water21 <- lc_2021 == 11

names(developed21) <- "developed21"
names(forest21) <- "forest21"
names(farm21) <- "farm21"
names(wetlands21) <- "wetlands21"
names(otherUndeveloped21) <- "otherUndeveloped21"
names(water21) <- "water21"

ggplot() +
  geom_sf(data=dvMSA) +
  geom_raster(data = rbind(rast(lc_2011) %>% mutate(label = "2011"),
                           rast(lc_2021) %>% mutate(label = "2021")) %>% 
              na.omit %>% filter(value > 0), 
              aes(x,y,fill=as.factor(value))) +
  facet_wrap(~label) +
  scale_fill_viridis(discrete=TRUE, name ="") +
  labs(title = "Land Cover, 2011 & 2021") +
  mapTheme + theme(legend.position = "none")

theRasterList21 <- c(developed21,forest21,farm21,wetlands21,otherUndeveloped21,water21)

dat2 <-
  aggregateRaster(theRasterList21, dat) %>%
  dplyr::select(developed21,forest21,farm21,wetlands21,otherUndeveloped21,water21) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat) %>%
  st_sf() %>%
  st_cast("POLYGON")

dat2 %>%
  gather(var,value,developed21:water21) %>%
  st_centroid() %>%
  mutate(X = st_coordinates(.)[,1],
         Y = st_coordinates(.)[,2]) %>%
  ggplot() +
    geom_sf(data=dvMSA) +
    geom_point(aes(X,Y, colour=as.factor(value))) +
    facet_wrap(~var) +
    scale_colour_manual(values = palette2,
                        labels=c("Other","Land Cover"),
                        name = "") +
    labs(title = "Land Cover Types, 2021",
         subtitle = "As fishnet centroids") +
   mapTheme

11.3: Sensitive Land Cover Lost

dat2 <-
  dat2 %>%
   mutate(sensitive_lost21 = ifelse(forest == 1 & forest21 == 0 |
                                    wetlands == 1 & wetlands21 == 0,1,0))
                      
ggplot() +
  geom_point(data=dat2, aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitive_lost21))) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","Sensitive Lost"),
                      name = "") +
  labs(title = "Sensitive lands lost: 2011 - 2021",
       subtitle = "As fishnet centroids") +
  mapTheme

11.4: Landscape Fragmentation

sensitiveRegions <- 
  raster::clump(wetlands21 + forest21) %>%
  rasterToPolygons() %>%
  st_as_sf() %>%
  group_by(clumps) %>% 
  summarize() %>%
    mutate(Acres = as.numeric(st_area(.) * 0.0000229568)) %>%
    filter(Acres > 3954)  %>%
  dplyr::select() %>%
  raster::rasterize(.,emptyRaster) 
sensitiveRegions[sensitiveRegions > 0] <- 1  
names(sensitiveRegions) <- "sensitiveRegions"

dat2 <-
  aggregateRaster(c(sensitiveRegions), dat2) %>%
  dplyr::select(sensitiveRegions) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat2) %>%
  st_sf()

ggplot() +
  geom_point(data=dat2, aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitiveRegions))) +
  scale_colour_manual(values = palette2,
                      labels=c("Other","Sensitive Regions"),
                      name="") +
  labs(title = "Sensitive regions",
       subtitle = "Continous areas of either wetlands or forests\ngreater than 1 acre") +
  mapTheme

11.5: Summarize by County

county_specific_metrics <- 
  dat2 %>%
  #predict development demand from our model
  mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
  #get a count count of grid cells by county which we can use to calculate rates below
  left_join(st_set_geometry(dat, NULL) %>% group_by(NAME) %>% summarize(count = n())) %>%
  #calculate summary statistics by county
  group_by(NAME) %>%
  summarize(Total_Farmland = sum(farm21) / max(count),
            Total_Forest = sum(forest21) / max(count),
            Total_Wetlands = sum(wetlands21) / max(count),
            Total_Undeveloped = sum(otherUndeveloped21) / max(count),
            Sensitive_Land_Lost = sum(sensitive_lost21) / max(count),
            Sensitive_Regions = sum(sensitiveRegions) / max(count),
            Mean_Development_Demand = mean(Development_Demand)) %>%
  #get population data by county
  left_join(countyPopulation_2031 %>% 
            mutate(Population_Change = county_projection_2031 - county_population_2021,
                   Population_Change_Rate = Population_Change / county_projection_2031) %>%
            dplyr::select(NAME,Population_Change_Rate)) %>%
  na.omit()
county_specific_metrics %>%
  gather(Variable, Value, -NAME, -geometry) %>%
  mutate(Variable = factor(Variable, levels=c("Population_Change_Rate","Mean_Development_Demand",
                                              "Total_Farmland","Total_Undeveloped","Total_Forest",
                                              "Total_Wetlands","Sensitive_Land_Lost","Sensitive_Regions",
                                              ordered = TRUE))) %>%
  mutate(Planning_Designation = case_when(
    Variable == "Population_Change_Rate" | Variable == "Mean_Development_Demand" ~ "Demand-Side",
    Variable == "Total_Farmland" | Variable == "Total_Undeveloped"               ~ "Suitable",
    TRUE                                                                         ~ "Not Suitable")) %>%
  ggplot(aes(x=Variable, y=Value, fill=Planning_Designation)) +
    geom_bar(stat="identity", position=position_dodge(), colour="black") +
    facet_wrap(~NAME, ncol=5) +
    coord_flip() +
    scale_y_continuous(breaks = seq(.25, 1, by = .25)) +
    geom_vline(xintercept = 2.5) + geom_vline(xintercept = 4.5) +
    scale_fill_manual(values=c("black","red","darkgreen")) +
    labs(title= "County Specific Allocation Metrics", subtitle= "As rates", x="Indicator", y="Rate") +
    plotTheme + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="bottom")

12: Scenario 1: Allocation

chester <-
  dat2 %>%
    mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
    filter(NAME == "Chester") 

chester_landUse <- rbind(
  filter(chester, forest21 == 1 | wetlands21 == 1 ) %>%
  dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
  filter(chester, developed21 == 1) %>%
  dplyr::select() %>% mutate(Land_Use = "Developed"))

grid.arrange(
ggplot() +
  geom_sf(data=chester, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
  geom_point(data=chester_landUse, aes(x=xyC(chester_landUse)[,1], 
                                        y=xyC(chester_landUse)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Development\nDemand",
                    labels=substr(quintileBreaks(chester,"Development_Demand"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Development Potential, 2031: Chester") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)),

ggplot() +
  geom_sf(data=chester, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
  geom_point(data=chester_landUse, aes(x=xyC(chester_landUse)[,1], 
                                        y=xyC(chester_landUse)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Population\nChange",
                    labels=substr(quintileBreaks(chester,"pop_Change"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Projected Population, 2031: Chester") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)), ncol=2)

13: Scenario 2 Transportation

13.1: Highway Distance

#dvHighways <-
#  st_read("C:/Users/ferna/OneDrive/Documents/ArcGIS/Projects/CPLN 6750/HW5/dv_roads.geojson") %>%
#  st_transform(st_crs(dvMSA)) %>%
#  st_intersection(dvMSA)

dvTransit <-
  st_read("/Users/luyiiwong/Documents/GitHub/LandUseModeling_HW5/DelawareValley/dv_transit.geojson") %>%
  st_transform(st_crs(dvMSA)) %>%
  st_intersection(dvMSA)
ggplot() +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2],colour=layer),size=1.5) +
  geom_sf(data=dvTransit) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","N`w Development")) +
  labs(title = "New Development and New Transit",
       subtitle = "As fishnet centroids") +
  mapTheme

emptyRaster <- lc_change
emptyRaster[] <- NA

dvTransit_spdf <- as(dvTransit, "Spatial")
transit_raster <- rasterize(dvTransit, emptyRaster)

#highway_raster <- 
  #as(dvTransit,'Spatial') %>%
  #rasterize(.,emptyRaster)

transit_raster_distance <- distance(transit_raster)
names(transit_raster_distance) <- "distance_transit"

transitPoints <-
  rasterToPoints(transit_raster_distance) %>%
  as.data.frame() %>%
  st_as_sf(coords = c("x", "y"), crs = st_crs(dvMSA_fishnet))

transitPoints_fishnet <- 
  aggregate(transitPoints, dvMSA_fishnet, mean) %>%
  mutate(distance_transit = ifelse(is.na(distance_transit),0,distance_transit))

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=transitPoints_fishnet, aes(x=xyC(transitPoints_fishnet)[,1], 
                                             y=xyC(transitPoints_fishnet)[,2], 
                 colour=factor(ntile(distance_transit,5))),size=1.5) +
  scale_colour_manual(values = palette5,
                      labels=substr(quintileBreaks(transitPoints_fishnet,"distance_transit"),1,8),
                      name="Quintile/nBreaks") +
  geom_sf(data=dvTransit, colour = "red") +
  labs(title = "Distance to transit",
       subtitle = "As fishnet centroids; transit visualized in red") +
  mapTheme

13.2: Create a new dataset for the model

changing the distance from highway to distance to transit

dat.2 <- 
  cbind(fishnet, transitPoints_fishnet, fishnetPopulation, aggregatedRasters)%>%
  dplyr::select(layer, developed, forest, farm, wetlands, otherUndeveloped, water,
                pop_2011, pop_2021, pop_Change, distance_transit,lagDevelopment) %>%
  st_join(studyAreaCounties) %>%
  mutate(developed10 = ifelse(layer == 1 & developed == 1, 0, developed)) %>%
  filter(water == 0) 

13.3: Create training set

set.seed(3456)
trainIndex <- 
  createDataPartition(dat$developed, p = .50,
                                  list = FALSE,
                                  times = 1)
transit.datTrain <- dat.2[ trainIndex,]
transit.datTest  <- dat.2[-trainIndex,]
# test model with transit
transitModel <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_Change + 
              distance_transit, 
              family="binomial"(link="logit"), data = transit.datTrain) 
summary(transitModel)
## 
## Call:
## glm(formula = layer ~ wetlands + forest + farm + otherUndeveloped + 
##     lagDevelopment + pop_Change + distance_transit, family = binomial(link = "logit"), 
##     data = transit.datTrain)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -9.258e+00  1.385e+00  -6.686 2.30e-11 ***
## wetlands1          5.393e+00  1.537e+00   3.509 0.000449 ***
## forest1            6.425e+00  1.377e+00   4.665 3.08e-06 ***
## farm1              6.984e+00  1.381e+00   5.058 4.24e-07 ***
## otherUndeveloped1  7.782e+00  1.400e+00   5.560 2.69e-08 ***
## lagDevelopment    -2.151e-03  4.284e-04  -5.020 5.16e-07 ***
## pop_Change         9.339e-03  2.463e-03   3.791 0.000150 ***
## distance_transit  -2.513e-04  9.403e-05  -2.672 0.007534 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1716.3  on 19904  degrees of freedom
## Residual deviance: 1461.8  on 19897  degrees of freedom
## AIC: 1477.8
## 
## Number of Fisher Scoring iterations: 12
#variables are highly significant
#r-square value: 14.8%
testSetProbs.2 <- 
  data.frame(class = datTest$layer,
             probs = predict(transitModel, transit.datTest, type="response")) 
  
ggplot(testSetProbs.2, aes(probs)) +
  geom_density(aes(fill=class), alpha=0.5) +
  scale_fill_manual(values = palette2,
                    labels=c("No Change","New Development")) +
  labs(title = "Histogram of test set predicted probabilities",
       x="Predicted Probabilities",y="Density") +
  plotTheme

13.4: Creating new dataframe for pop projection

dat.new <-
  aggregateRaster(theRasterList21, dat.2) %>%
  dplyr::select(developed21,forest21,farm21,wetlands21,otherUndeveloped21,water21) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat.2) %>%
  st_sf() %>%
  st_cast("POLYGON")

# considering land fragmentation
dat.new <-
  aggregateRaster(c(sensitiveRegions), dat.new) %>%
  dplyr::select(sensitiveRegions) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat.2) %>%
  st_sf()

13.5: Land allocation - population projection and development forcast

chester.new <-
  dat.new %>%
    mutate(Development_Demand = predict(transitModel, dat.new, type="response")) %>%
    filter(NAME == "Chester") 

chester_landUse.new <- rbind(
  filter(chester, forest21 == 1 | wetlands21 == 1 ) %>%
  dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
  filter(chester, developed21 == 1) %>%
  dplyr::select() %>% mutate(Land_Use = "Developed"))

grid.arrange(
ggplot() +
  geom_sf(data=chester.new, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
  geom_point(data=chester_landUse.new, aes(x=xyC(chester_landUse.new)[,1], 
                                        y=xyC(chester_landUse.new)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Development\nDemand",
                    labels=substr(quintileBreaks(chester.new,"Development_Demand"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Development Potential, 2031: Chester",
       subtitle = "Additional Transit") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)),

ggplot() +
  geom_sf(data=chester.new, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
  geom_point(data=chester_landUse.new, aes(x=xyC(chester_landUse.new)[,1], 
                                        y=xyC(chester_landUse.new)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Population\nChange",
                    labels=substr(quintileBreaks(chester.new,"pop_Change"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Projected Population, 2031: Chester",
       subtitle = "Additional Transit") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)), ncol=2)

---
title: "Delaware Valley MSA Urban Growth Model"
author: "Lu Yii Wong & Javier Fernandez"
date: "2024-05-10"
output:
  html_document:
    toc: true
    toc_float: true
    code_folding: hide
    code_download: true
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r load_packages, message=FALSE, warning=FALSE, results = "hide"}
library(tidyverse)
library(sf)
library(raster)
library(knitr)
library(kableExtra)
library(tidycensus)
library(tigris)
library(FNN)
#library(QuantPsyc) # JE Note: in R 4.1, QuantPsyc package not available.
library(caret)
library(yardstick)
library(pscl)
library(plotROC) 
library(ggrepel)
library(pROC)
library(grid)
library(gridExtra)
library(viridis)
library(igraph)

plotTheme <- theme(
  plot.title =element_text(size=12),
  plot.subtitle = element_text(size=8),
  plot.caption = element_text(size = 6),
  axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
  axis.text.y = element_text(size = 10),
  axis.title.y = element_text(size = 10),
  # Set the entire chart region to blank
  panel.background=element_blank(),
  plot.background=element_blank(),
  #panel.border=element_rect(colour="#F0F0F0"),
  # Format the grid
  panel.grid.major=element_line(colour="#D0D0D0",size=.75),
  axis.ticks=element_blank())

mapTheme <- theme(plot.title =element_text(size=12),
                  plot.subtitle = element_text(size=8),
                  plot.caption = element_text(size = 6),
                  axis.line=element_blank(),
                  axis.text.x=element_blank(),
                  axis.text.y=element_blank(),
                  axis.ticks=element_blank(),
                  axis.title.x=element_blank(),
                  axis.title.y=element_blank(),
                  panel.background=element_blank(),
                  panel.border=element_blank(),
                  panel.grid.major=element_line(colour = 'transparent'),
                  panel.grid.minor=element_blank(),
                  legend.direction = "vertical", 
                  legend.position = "right",
                  plot.margin = margin(1, 1, 1, 1, 'cm'),
                  legend.key.height = unit(1, "cm"), legend.key.width = unit(0.2, "cm"))

palette2 <- c("#41b6c4","#253494")
palette4 <- c("#a1dab4","#41b6c4","#2c7fb8","#253494")
palette5 <- c("#ffffcc","#a1dab4","#41b6c4","#2c7fb8","#253494")
palette10 <- c("#f7fcf0","#e0f3db","#ccebc5","#a8ddb5","#7bccc4",
               "#4eb3d3","#2b8cbe","#0868ac","#084081","#f7fcf0")
```

```{r, warning = FALSE, message = FALSE}
#this function converts a column in to quintiles. It is used for mapping.
quintileBreaks <- function(df,variable) {
    as.character(quantile(df[[variable]],
                          c(.01,.2,.4,.6,.8),na.rm=T))
}

#This function can be used to convert a polygon sf to centroids xy coords.
xyC <- function(aPolygonSF) {
  as.data.frame(
    cbind(x=st_coordinates(st_centroid(aPolygonSF))[,1],
          y=st_coordinates(st_centroid(aPolygonSF))[,2]))
} 

#this function convert a raster to a data frame so it can be plotted in ggplot
rast <- function(inRaster) {
  data.frame(
    xyFromCell(inRaster, 1:ncell(inRaster)), 
    value = getValues(inRaster)) }
```

# 1: Set up

```{r load_data, warning = FALSE, message = FALSE, results = "hide"}
dvMSA <- 
  st_read("/Users/luyiiwong/Documents/Land Use & Environmental Modeling/Assignment5/dv_counties/dv_counties.shp") 

lc_2011 = raster("/Users/luyiiwong/Documents/Land Use & Environmental Modeling/Assignment5/lc_2011_clip_Resample.tif")
lc_2021 = raster("/Users/luyiiwong/Documents/Land Use & Environmental Modeling/Assignment5/lc_2021_clip_Resample.tif")

lc_change <- lc_2011+lc_2021

```

# 2: Calculating Land Cover Change
```{r}
#creating a matrix to classify urban and non-urban land use
reclassMatrix <- 
  matrix(c(
    0,12,0,
    12,24,1,
    24,Inf,0),
  ncol=3, byrow=T)
```

```{r, warning = FALSE, message = FALSE}
# reclassifying land cover for both years
developed_2011 <- 
  reclassify(lc_2011,reclassMatrix)

developed_2021 <- 
  reclassify(lc_2021,reclassMatrix)

```

```{r, warning = FALSE, message = FALSE}
# calculating land use change from 2011 to 2021
development_change <- developed_2011+developed_2021

# the values that are =1 indicate that there was CHANGE between 2011 and 2021
# histogram shows that there were very few values in the study area
hist(development_change)
```

```{r, warning = FALSE, message = FALSE}
# reclassifying values that are not equal to 1 to NA in development change
# this means that values 0 and 2 become NA
development_change[development_change != 1] <- NA

# plotting the values of development change = 1 spatially
ggplot() +
  geom_sf(data=dvMSA) +
  geom_raster(data=rast(development_change) %>% na.omit, 
              aes(x,y,fill=as.factor(value))) +
  scale_fill_viridis(discrete=TRUE, name ="Land Cover\nChange") + 
  labs(title="Development land use change") +
  mapTheme
```

## 2.1: Creating the fishnet
```{r, warning = FALSE, message = FALSE}
dvMSA_fishnet <- 
  st_make_grid(dvMSA, 500) %>%
  st_sf()

dvMSA_fishnet <-
  dvMSA_fishnet[dvMSA,]
```

```{r, warning = FALSE, message= FALSE}
ggplot() +
  geom_sf(data=dvMSA_fishnet) +
  labs(title="Fishnet, 500 Foot Resolution") +
  mapTheme
```

```{r, warning = FALSE, message = FALSE}
#original version
changePoints <-
  rasterToPoints(development_change) %>%
  as.data.frame() %>%
  st_as_sf(coords = c("x", "y"), crs = st_crs(dvMSA_fishnet))

# changed this naming from lc_change to layer then converted NA to 0
# because in fishnet here, it's called layer not lc_change
fishnet <- 
  aggregate(changePoints, dvMSA_fishnet, sum) %>%
  mutate(layer = ifelse(is.na(layer),0,1),
         layer = as.factor(layer))

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)$x, y=xyC(fishnet)$y, colour=layer)) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","New Development"),
                      name = "") +
  labs(title = "Land Cover Development Change", subtitle = "As fishnet centroids") +
  mapTheme
```

## 2.2: Plotting Land Cover in 2011

```{r, warning = FALSE, message = FALSE}
ggplot() +
  geom_sf(data=dvMSA) +
  geom_raster(data=rast(lc_2011) %>% na.omit %>% filter(value > 0), 
              aes(x,y,fill=as.factor(value))) +
  scale_fill_viridis(discrete=TRUE, name ="") +
  labs(title = "Land Cover, 2011") +
  mapTheme +
  theme(legend.direction="horizontal")
```

The table below shows the approach taken to recoded existing land cover codes into the categories used in our analysis. In the code block below new rasters are generated and `names` are applied. Naming ensures that when the raster is integrated with the fishnet, the column reflects the appropriate raster.

| Old_Classification             | New_Classification                                  |
|--------------------------------|-----------------------------------------------------|
| Open Space as well as Low, Medium and High Intensity Development | Developed |
| Deciduous, Evergreen, and Mixed Forest |  Forest |
| Pasture/Hay and Cultivated Crops | Farm |
| Woody and Emergent Herbaceous Wetlands | Woodlands |
| Barren Land, Dwarf Scrub, and Grassland/Herbaceous | Other Undeveloped |
| Water | Water |

```{r, warning = FALSE, message = FALSE}
developed <- lc_2011 == 21 | lc_2011 == 22 | lc_2011 == 23 | lc_2011 == 24
forest <- lc_2011 == 41 | lc_2011 == 42 | lc_2011 == 43 
farm <- lc_2011 == 81 | lc_2011 == 82 
wetlands <- lc_2011 == 90 | lc_2011 == 95 
otherUndeveloped <- lc_2011 == 52 | lc_2011 == 71 | lc_2011 == 31 
water <- lc_2011 == 11

names(developed) <- "developed"
names(forest) <- "forest"
names(farm) <- "farm"
names(wetlands) <- "wetlands"
names(otherUndeveloped) <- "otherUndeveloped"
names(water) <- "water"
```

```{r, warning = FALSE, message = FALSE}

aggregateRaster <- function(inputRasterList, theFishnet) {
  #create an empty fishnet with the same dimensions as the input fishnet
  theseFishnets <- theFishnet %>% dplyr::select()
  #for each raster in the raster list
  for (i in inputRasterList) {
  #create a variable name corresponding to the ith raster
  varName <- names(i)
  #convert raster to points as an sf
    thesePoints <-
      rasterToPoints(i) %>%
      as.data.frame() %>%
      st_as_sf(coords = c("x", "y"), crs = st_crs(theFishnet)) %>%
      filter(.[[1]] == 1)
  #aggregate to the fishnet
    thisFishnet <-
      aggregate(thesePoints, theFishnet, length) %>%
      mutate(!!varName := ifelse(is.na(.[[1]]),0,1))
  #add to the larger fishnet
    theseFishnets <- cbind(theseFishnets,thisFishnet)
  }
  #output all aggregates as one large fishnet
   return(theseFishnets)
  }
```

```{r, warning = FALSE, message = FALSE}
theRasterList <- c(developed,forest,farm,wetlands,otherUndeveloped,water)

aggregatedRasters <-
  aggregateRaster(theRasterList, dvMSA_fishnet) %>%
  dplyr::select(developed,forest,farm,wetlands,otherUndeveloped,water) %>%
  mutate_if(is.numeric,as.factor)

aggregatedRasters %>%
  gather(var,value,developed:water) %>%
  st_cast("POLYGON") %>%    #just to make sure no weird geometries slipped in
  mutate(X = xyC(.)$x,
         Y = xyC(.)$y) %>%
  ggplot() +
    geom_sf(data=dvMSA) +
    geom_point(aes(X,Y, colour=as.factor(value))) +
    facet_wrap(~var) +
    scale_colour_manual(values = palette2,
                        labels=c("Other","Land Cover"),
                        name = "") +
    labs(title = "Land Cover Types, 2011",
         subtitle = "As fishnet centroids") +
   mapTheme
```

# 3: Pulling Demographic Data
```{r message = FALSE, results='hide'}
# setting up api key 
census_api_key("b83a23afee4a8ed0fa131e449869e6577b87151e", overwrite = TRUE, install = TRUE)
```
Delaware Valley MSA, Pennsylvania Counties:

- Bucks County
- Chester County
- Delaware County
- Montgomery County
- Philadelphia County

```{r message = FALSE, results='hide'}
# pulling data from census for 2011
dvpop_2011 <- get_acs(geography = "tract", 
          variables = c("B01003_001E"), 
          year = 2011, 
          state = "PA", 
          county = c("Bucks", "Chester", "Delaware", "Montgomery", "Philadelphia"),
          geometry = TRUE, 
          output = "wide") %>%
  rename(pop_2011 = B01003_001E) %>%
  dplyr::select(GEOID, NAME, pop_2011, geometry)%>%
  st_transform(st_crs(dvMSA_fishnet))


# pulling data from census for 2021
dvpop_2021 <- get_acs(geography = "tract", 
          variables = c("B01003_001E"), 
          year = 2021, 
          state = "PA", 
          county = c("Bucks", "Chester", "Delaware", "Montgomery", "Philadelphia"),
          geometry = TRUE, 
          output = "wide") %>%
  rename(pop_2021 = B01003_001E) %>%
  dplyr::select(GEOID, NAME, pop_2021, geometry)%>%
  st_transform(st_crs(dvMSA_fishnet))




## grid arrange tract 2011 v 2021
# WHY are values NA????
grid.arrange(
ggplot() +
  geom_sf(data = dvpop_2011, aes(fill=factor(ntile(pop_2011,5))), colour=NA) +
  scale_fill_manual(values = palette5,
                    labels=quintileBreaks(dvpop_2011,"pop_2011"),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delware Valley PA counties by tract: 2011") +
  mapTheme,

ggplot() +
  geom_sf(data = dvpop_2021, aes(fill=factor(ntile(pop_2021,5))), colour=NA) +
  scale_fill_manual(values = palette5,
                    labels=quintileBreaks(dvpop_2021,"pop_2021"),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delware Valley PA counties by tract: 2021") +
  mapTheme, ncol=2)

```

## 3.1: Interpolating Population and Fishnet
```{r, warning = FALSE, message = FALSE}
dvMSA_fishnet <-
  dvMSA_fishnet %>%
  rownames_to_column("fishnetID") %>% 
  mutate(fishnetID = as.numeric(fishnetID)) %>%
  dplyr::select(fishnetID)

fishnetPopulation11 <-
  st_interpolate_aw(dvpop_2011["pop_2011"], dvMSA_fishnet, extensive=TRUE) %>%
  as.data.frame(.) %>%
  rownames_to_column(var = "fishnetID") %>%
  left_join(dvMSA_fishnet %>%
              mutate(fishnetID = as.character(fishnetID)),
            ., by=c("fishnetID"='fishnetID')) %>% 
  mutate(pop_2011 = replace_na(pop_2011,0)) %>%
  dplyr::select(pop_2011)

fishnetPopulation21 <-
  st_interpolate_aw(dvpop_2021["pop_2021"],dvMSA_fishnet, extensive=TRUE) %>%
  as.data.frame(.) %>%
  rownames_to_column(var = "fishnetID") %>%
  left_join(dvMSA_fishnet %>%
              mutate(fishnetID = as.character(fishnetID)),
            ., by=c("fishnetID"='fishnetID')) %>% 
  mutate(pop_2021 = replace_na(pop_2021,0)) %>%
  dplyr::select(pop_2021)

fishnetPopulation <- 
  cbind(fishnetPopulation11,fishnetPopulation21) %>%
  dplyr::select(pop_2011,pop_2021) %>%
  mutate(pop_Change = pop_2021 - pop_2011)
```

```{r, warning = FALSE, message = FALSE, fig.height = 8, fig.width= 11}
grid.arrange(
ggplot() +
  geom_sf(data=dvpop_2021, aes(fill=factor(ntile(pop_2021,5))),colour=NA) +
  scale_fill_manual(values = palette5,
                    labels=substr(quintileBreaks(dvpop_2021,"pop_2021"),1,4),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delaware Valley MSA, PA: 2021",
       subtitle="Represented as tracts; Boundaries omitted") +
  mapTheme,

ggplot() +
  geom_sf(data=fishnetPopulation, aes(fill=factor(ntile(pop_2021,5))),colour=NA) +
  scale_fill_manual(values = palette5,
                   labels=substr(quintileBreaks(fishnetPopulation,"pop_2021"),1,4),
                   name="Quintile\nBreaks") +
  labs(title="Population, Delaware Valley MSA, PA: 2021",
       subtitle="Represented as fishnet gridcells; Boundaries omitted") +
  mapTheme, ncol=2)
```

# 4: Highway Distance
```{r, warning = FALSE, message = FALSE, results = "hide"}
#dvHighways <-
#  st_read("C:/Users/ferna/OneDrive/Documents/ArcGIS/Projects/CPLN 6750/HW5/dv_roads.geojson") %>%
#  st_transform(st_crs(dvMSA)) %>%
#  st_intersection(dvMSA)

dvHighways <-
  st_read("/Users/luyiiwong/Documents/GitHub/LandUseModeling_HW5/DelawareValley/dv_roads.geojson") %>%
  st_transform(st_crs(dvMSA)) %>%
  st_intersection(dvMSA)
```

```{r warning = FALSE, message= FALSE}
ggplot() +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2],colour=layer),size=1.5) +
  geom_sf(data=dvHighways) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","New Development")) +
  labs(title = "New Development and Highways",
       subtitle = "As fishnet centroids") +
  mapTheme
```

```{r, warning = FALSE, message = FALSE}
emptyRaster <- lc_change
emptyRaster[] <- NA

dvHighways_spdf <- as(dvHighways, "Spatial")
highway_raster <- rasterize(dvHighways, emptyRaster)

#highway_raster <- 
  #as(dvHighways,'Spatial') %>%
  #rasterize(.,emptyRaster)

highway_raster_distance <- distance(highway_raster)
names(highway_raster_distance) <- "distance_highways"

highwayPoints <-
  rasterToPoints(highway_raster_distance) %>%
  as.data.frame() %>%
  st_as_sf(coords = c("x", "y"), crs = st_crs(dvMSA_fishnet))

highwayPoints_fishnet <- 
  aggregate(highwayPoints, dvMSA_fishnet, mean) %>%
  mutate(distance_highways = ifelse(is.na(distance_highways),0,distance_highways))

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=highwayPoints_fishnet, aes(x=xyC(highwayPoints_fishnet)[,1], 
                                             y=xyC(highwayPoints_fishnet)[,2], 
                 colour=factor(ntile(distance_highways,5))),size=1.5) +
  scale_colour_manual(values = palette5,
                      labels=substr(quintileBreaks(highwayPoints_fishnet,"distance_highways"),1,8),
                      name="Quintile/nBreaks") +
  geom_sf(data=dvHighways, colour = "red") +
  labs(title = "Distance to Highways",
       subtitle = "As fishnet centroids; Highways visualized in red") +
  mapTheme
```

# 5: Spatial lag
``` {r message = FALSE, results='hide', warning = FALSE}
nn_function <- function(measureFrom,measureTo,k) {
  #convert the sf layers to matrices
  measureFrom_Matrix <-
    as.matrix(measureFrom)
  measureTo_Matrix <-
    as.matrix(measureTo)
  nn <-   
    get.knnx(measureTo, measureFrom, k)$nn.dist
    output <-
    as.data.frame(nn) %>%
    rownames_to_column(var = "thisPoint") %>%
    gather(points, point_distance, V1:ncol(.)) %>%
    arrange(as.numeric(thisPoint)) %>%
    group_by(thisPoint) %>%
    summarize(pointDistance = mean(point_distance)) %>%
    arrange(as.numeric(thisPoint)) %>% 
    dplyr::select(-thisPoint) %>%
    pull()
  
  return(output)  
}
```

```{r message = FALSE, results='hide', warning = FALSE}
fishnet$lagDevelopment <-
    nn_function(xyC(fishnet),
                xyC(filter(aggregatedRasters,developed==1)),
                2)

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2], 
                 colour= log(lagDevelopment), size=.001)) +
  labs(title = "Spatial Lag to 2011 Development",
       subtitle = "As fishnet centroids")

#how should we interpret this?
hist(fishnet$lagDevelopment)
# make histogram
# try viridis
# log color/lagdevelopment
# filter out og developed cells 
```

# 6: Create MSA Counties 

```{r, warning = FALSE, message = FALSE, results = "hide"}
options(tigris_class = "sf")

studyAreaCounties <- 
  counties("Pennsylvania") %>%
  st_transform(st_crs(dvMSA)) %>%
  dplyr::select(NAME) %>%
  .[st_buffer(dvMSA,-500), , op=st_intersects]
```

```{r, warning = FALSE, message = FALSE}
ggplot() +
  geom_sf(data=studyAreaCounties) +
  labs(title = "Study Area Counties") +
  mapTheme
```

# 7: Create the Final Dataset 
Once we join the data set with out county boundaries, all the lc_change =1 is dropped
```{r}
# can we go through this code - specifically the mutate function
# making sure logic is right, that change is from non-developed to developed
# basically ignoring classifications that are wrong
dat <- 
  cbind(fishnet, highwayPoints_fishnet, fishnetPopulation, aggregatedRasters)%>%
  dplyr::select(layer, developed, forest, farm, wetlands, otherUndeveloped, water,
                pop_2011, pop_2021, pop_Change, distance_highways,lagDevelopment) %>%
  st_join(studyAreaCounties) %>%
  mutate(developed10 = ifelse(layer == 1 & developed == 1, 0, developed)) %>%
  filter(water == 0) 
```

# 8: Exploratroy Analysis
It seems like land change only occured from undeveloped to water. Therefore, there is no new development...
```{r, warning = FALSE, message = FALSE}
dat %>%
  dplyr::select(distance_highways,lagDevelopment,layer) %>%
  gather(Variable, Value, -layer, -geometry) %>%
  ggplot(., aes(layer, Value, fill=layer)) + 
    geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
    facet_wrap(~Variable) +
    scale_fill_manual(values = palette2,
                      labels=c("No Change","New Development"),
                      name="") +
    labs(title="New Development as a Function of the Continuous Variables") +
    plotTheme 
```

```{r, warning = FALSE, message = FALSE}
dat %>%
  dplyr::select(pop_2011,pop_2021,pop_Change,layer) %>%
  gather(Variable, Value, -layer, -geometry) %>%
  ggplot(., aes(layer, Value, fill=layer)) + 
    geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
    facet_wrap(~Variable) +
    scale_fill_manual(values = palette2,
                      labels=c("No Change","New Development"),
                      name="") +
    labs(title="New Development as a Function of Factor Variables") +
    plotTheme
```

```{r, warning = FALSE, message = FALSE}
dat %>%
  dplyr::select(layer:otherUndeveloped,developed) %>%
  gather(Land_Cover_Type, Value, -layer, -geometry) %>%
   st_set_geometry(NULL) %>%
     group_by(layer, Land_Cover_Type) %>%
     summarize(n = sum(as.numeric(Value))) %>%
     ungroup() %>%
    mutate(Conversion_Rate = paste0(round(100 * n/sum(n), 2), "%")) %>%
    filter(layer == 1) %>%
  dplyr::select(Land_Cover_Type,Conversion_Rate) %>%
  kable() %>% kable_styling(full_width = F)
```

# 9: Predicting for 2021 Model
```{r, warning = FALSE, message = FALSE}
set.seed(3456)
trainIndex <- 
  createDataPartition(dat$developed, p = .50,
                                  list = FALSE,
                                  times = 1)
datTrain <- dat[ trainIndex,]
datTest  <- dat[-trainIndex,]

```

```{r, warning = FALSE, message = FALSE}
Model1 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped, 
              family="binomial"(link="logit"), data = datTrain)

Model2 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment, 
              family="binomial"(link="logit"), data = datTrain)
              
Model3 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_2011, 
              family="binomial"(link="logit"), data = datTrain)          
              
Model4 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_2011 + 
              pop_2021, 
              family="binomial"(link="logit"), data = datTrain)              
            
Model5 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_Change, 
              family="binomial"(link="logit"), data = datTrain)              
              
Model6 <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_Change + 
              distance_highways, 
              family="binomial"(link="logit"), data = datTrain) 
```

```{r, warning = FALSE, message = FALSE, results='hide'}
modelList <- paste0("Model", 1:6)
map_dfc(modelList, function(x)pR2(get(x)))[4,] %>%
  setNames(paste0("Model",1:6)) %>%
  gather(Model,McFadden) %>%
  ggplot(aes(Model,McFadden)) +
    geom_bar(stat="identity") +
    labs(title= "McFadden R-Squared by Model") +
    plotTheme
```

```{r, warning = FALSE, message = FALSE}
testSetProbs <- 
  data.frame(class = datTest$layer,
             probs = predict(Model6, datTest, type="response")) 
  
ggplot(testSetProbs, aes(probs)) +
  geom_density(aes(fill=class), alpha=0.5) +
  scale_fill_manual(values = palette2,
                    labels=c("No Change","New Development")) +
  labs(title = "Histogram of test set predicted probabilities",
       x="Predicted Probabilities",y="Density") +
  plotTheme
```

## 9.1: Accuracy
```{r, warning = FALSE, message = FALSE}
options(yardstick.event_first = FALSE)

testSetProbs <- 
  testSetProbs %>% 
  mutate(predClass_05 = as.factor(ifelse(testSetProbs$probs >= 0.05 ,1,0)),
         predClass_17 = as.factor(ifelse(testSetProbs$probs >= 0.17 ,1,0))) 

testSetProbs %>%
  dplyr::select(-probs) %>%
  gather(Variable, Value, -class) %>%
  group_by(Variable) %>%
  summarize(Sensitivity = round(yardstick::sens_vec(class,factor(Value)),2),
            Specificity = round(yardstick::spec_vec(class,factor(Value)),2),
            Accuracy = round(yardstick::accuracy_vec(class,factor(Value)),2)) %>% 
  kable() %>%
  kable_styling(full_width = F)
```

```{r, warning = FALSE, message = FALSE}
predsForMap <-         
  dat %>%
    mutate(probs = predict(Model6, dat, type="response") ,
           Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
           Threshold_17_Pct =  as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
    dplyr::select(layer,Threshold_5_Pct,Threshold_17_Pct) %>%
    gather(Variable,Value, -geometry) %>%
    st_cast("POLYGON")
```

```{r, warning = FALSE, message= FALSE, fig.height = 6, fig.width= 8}
ggplot() +
  geom_point(data=predsForMap, aes(x=xyC(predsForMap)[,1], y=xyC(predsForMap)[,2], colour=Value)) +
  facet_wrap(~Variable) +
  scale_colour_manual(values = palette2, labels=c("No Change","New Development"),
                      name="") +
  labs(title="Development Predictions - Low Threshold") + 
  mapTheme
```

```{r, warning = FALSE, message = FALSE}
ConfusionMatrix.metrics <-
  dat %>%
    mutate(probs = predict(Model6, dat, type="response") ,
           Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
           Threshold_17_Pct =  as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
    mutate(TrueP_05 = ifelse(layer  == 1 & Threshold_5_Pct == 1, 1,0),
           TrueN_05 = ifelse(layer  == 0 & Threshold_5_Pct == 0, 1,0),
           TrueP_17 = ifelse(layer  == 1 & Threshold_17_Pct == 1, 1,0),
           TrueN_17 = ifelse(layer  == 0 & Threshold_17_Pct == 0, 1,0)) %>%
    dplyr::select(., starts_with("True")) %>%
    gather(Variable, Value, -geometry) %>%
    st_cast("POLYGON") 
```

```{r, warning = FALSE, message = FALSE, fig.height= 8, fig.width= 8 }
ggplot(data=ConfusionMatrix.metrics) +
  geom_point(aes(x=xyC(ConfusionMatrix.metrics)[,1], 
                 y=xyC(ConfusionMatrix.metrics)[,2], colour = as.factor(Value))) +
  facet_wrap(~Variable) +
  scale_colour_manual(values = palette2, labels=c("Correct","Incorrect"),
                       name="") +
  labs(title="Development Predictions - Low Threshold") + mapTheme
```


# 10: Predicting Land Cover Demand for 2031
```{r, warning = FALSE, message = FALSE}
dat <-
  dat %>%
  mutate(lagDevelopment = nn_function(xyC(.), xyC(filter(.,developed10 == 1)),2))
```

```{r, warning = FALSE, message = FALSE}
#CHANGE THIS CODE
countyPopulation_2031 <- 
  data.frame(
   NAME = 
     c("Philadelphia","Chester","Montgomery","Bucks","Delaware"),
   county_projection_2031 = 
     c(1643971, 599932, 884387, 669299, 577248)) %>%
  #CHANGE LINE ABOVE - from a study DVRPC population projection
   left_join(
     dat %>%
       st_set_geometry(NULL) %>%
       group_by(NAME) %>%
       summarize(county_population_2021 = round(sum(pop_2021))))

countyPopulation_2031 %>%
  gather(Variable,Value, -NAME) %>%
  ggplot(aes(reorder(NAME,-Value),Value)) +
  geom_bar(aes(fill=Variable), stat = "identity", position = "dodge") +
  scale_fill_manual(values = palette2,
                    labels=c("2021","2031"),
                    name="Population") +
  labs(title="Population Change by County: 2021 - 2031",
       x="County", y="Population") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  plotTheme
```

## 10.1: Predicting Development Demand
```{r, warning = FALSE, message = FALSE}
# section edited
dat_infill <-
  dat %>%
  #calculate population change
    left_join(countyPopulation_2031) %>%
    mutate(proportion_of_county_pop = pop_2021 / county_population_2021,
           pop_2031.infill = proportion_of_county_pop * county_projection_2031,
           pop_Change = round(pop_2031.infill - pop_2021),2) %>%
    dplyr::select(-county_projection_2031, -county_population_2021, 
                  -proportion_of_county_pop, -pop_2031.infill) %>%
  #predict for 2031
    mutate(predict_2031.infill = predict(Model6,. , type="response"))

dat_infill %>%
  ggplot() +  
  geom_point(aes(x=xyC(dat_infill)[,1], y=xyC(dat_infill)[,2], colour = factor(ntile(predict_2031.infill,5)))) +
  scale_colour_manual(values = palette5,
                    labels=substr(quintileBreaks(dat_infill,"predict_2031.infill"),1,4),
                    name="Quintile\nBreaks") +
  geom_sf(data=studyAreaCounties, fill=NA, colour="black", size=1) +
  labs(title= "Development Demand in 2031: Predicted Probabilities") +
  mapTheme
```

# 11: Comparing Predicted Development Demand & Environmental Sensitivity
## 11.2: 2021 Land Cover Data
```{r, warning = FALSE, message = FALSE}
developed21 <- lc_2021 == 21 | lc_2021 == 22 | lc_2021 == 23 | lc_2021 == 24
forest21 <- lc_2021 == 41 | lc_2021 == 42 | lc_2021 == 43 
farm21 <- lc_2021 == 81 | lc_2021 == 82 
wetlands21 <- lc_2021 == 90 | lc_2021 == 95 
otherUndeveloped21 <- lc_2021 == 52 | lc_2021 == 71 | lc_2021 == 31 
water21 <- lc_2021 == 11

names(developed21) <- "developed21"
names(forest21) <- "forest21"
names(farm21) <- "farm21"
names(wetlands21) <- "wetlands21"
names(otherUndeveloped21) <- "otherUndeveloped21"
names(water21) <- "water21"

ggplot() +
  geom_sf(data=dvMSA) +
  geom_raster(data = rbind(rast(lc_2011) %>% mutate(label = "2011"),
                           rast(lc_2021) %>% mutate(label = "2021")) %>% 
              na.omit %>% filter(value > 0), 
              aes(x,y,fill=as.factor(value))) +
  facet_wrap(~label) +
  scale_fill_viridis(discrete=TRUE, name ="") +
  labs(title = "Land Cover, 2011 & 2021") +
  mapTheme + theme(legend.position = "none")
```

```{r, warning = FALSE, message = FALSE}
theRasterList21 <- c(developed21,forest21,farm21,wetlands21,otherUndeveloped21,water21)

dat2 <-
  aggregateRaster(theRasterList21, dat) %>%
  dplyr::select(developed21,forest21,farm21,wetlands21,otherUndeveloped21,water21) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat) %>%
  st_sf() %>%
  st_cast("POLYGON")

dat2 %>%
  gather(var,value,developed21:water21) %>%
  st_centroid() %>%
  mutate(X = st_coordinates(.)[,1],
         Y = st_coordinates(.)[,2]) %>%
  ggplot() +
    geom_sf(data=dvMSA) +
    geom_point(aes(X,Y, colour=as.factor(value))) +
    facet_wrap(~var) +
    scale_colour_manual(values = palette2,
                        labels=c("Other","Land Cover"),
                        name = "") +
    labs(title = "Land Cover Types, 2021",
         subtitle = "As fishnet centroids") +
   mapTheme
```

## 11.3: Sensitive Land Cover Lost

```{r, warning = FALSE, message = FALSE}
dat2 <-
  dat2 %>%
   mutate(sensitive_lost21 = ifelse(forest == 1 & forest21 == 0 |
                                    wetlands == 1 & wetlands21 == 0,1,0))
                      
ggplot() +
  geom_point(data=dat2, aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitive_lost21))) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","Sensitive Lost"),
                      name = "") +
  labs(title = "Sensitive lands lost: 2011 - 2021",
       subtitle = "As fishnet centroids") +
  mapTheme
```

## 11.4: Landscape Fragmentation

```{r, warning = FALSE, message = FALSE, fig.height = 6, fig.width= 6}
sensitiveRegions <- 
  raster::clump(wetlands21 + forest21) %>%
  rasterToPolygons() %>%
  st_as_sf() %>%
  group_by(clumps) %>% 
  summarize() %>%
    mutate(Acres = as.numeric(st_area(.) * 0.0000229568)) %>%
    filter(Acres > 3954)  %>%
  dplyr::select() %>%
  raster::rasterize(.,emptyRaster) 
sensitiveRegions[sensitiveRegions > 0] <- 1  
names(sensitiveRegions) <- "sensitiveRegions"

dat2 <-
  aggregateRaster(c(sensitiveRegions), dat2) %>%
  dplyr::select(sensitiveRegions) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat2) %>%
  st_sf()

ggplot() +
  geom_point(data=dat2, aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitiveRegions))) +
  scale_colour_manual(values = palette2,
                      labels=c("Other","Sensitive Regions"),
                      name="") +
  labs(title = "Sensitive regions",
       subtitle = "Continous areas of either wetlands or forests\ngreater than 1 acre") +
  mapTheme
```

## 11.5: Summarize by County
```{r, warning = FALSE, message = FALSE}
county_specific_metrics <- 
  dat2 %>%
  #predict development demand from our model
  mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
  #get a count count of grid cells by county which we can use to calculate rates below
  left_join(st_set_geometry(dat, NULL) %>% group_by(NAME) %>% summarize(count = n())) %>%
  #calculate summary statistics by county
  group_by(NAME) %>%
  summarize(Total_Farmland = sum(farm21) / max(count),
            Total_Forest = sum(forest21) / max(count),
            Total_Wetlands = sum(wetlands21) / max(count),
            Total_Undeveloped = sum(otherUndeveloped21) / max(count),
            Sensitive_Land_Lost = sum(sensitive_lost21) / max(count),
            Sensitive_Regions = sum(sensitiveRegions) / max(count),
            Mean_Development_Demand = mean(Development_Demand)) %>%
  #get population data by county
  left_join(countyPopulation_2031 %>% 
            mutate(Population_Change = county_projection_2031 - county_population_2021,
                   Population_Change_Rate = Population_Change / county_projection_2031) %>%
            dplyr::select(NAME,Population_Change_Rate)) %>%
  na.omit()
```

```{r, warning = FALSE, message = FALSE}
county_specific_metrics %>%
  gather(Variable, Value, -NAME, -geometry) %>%
  mutate(Variable = factor(Variable, levels=c("Population_Change_Rate","Mean_Development_Demand",
                                              "Total_Farmland","Total_Undeveloped","Total_Forest",
                                              "Total_Wetlands","Sensitive_Land_Lost","Sensitive_Regions",
                                              ordered = TRUE))) %>%
  mutate(Planning_Designation = case_when(
    Variable == "Population_Change_Rate" | Variable == "Mean_Development_Demand" ~ "Demand-Side",
    Variable == "Total_Farmland" | Variable == "Total_Undeveloped"               ~ "Suitable",
    TRUE                                                                         ~ "Not Suitable")) %>%
  ggplot(aes(x=Variable, y=Value, fill=Planning_Designation)) +
    geom_bar(stat="identity", position=position_dodge(), colour="black") +
    facet_wrap(~NAME, ncol=5) +
    coord_flip() +
    scale_y_continuous(breaks = seq(.25, 1, by = .25)) +
    geom_vline(xintercept = 2.5) + geom_vline(xintercept = 4.5) +
    scale_fill_manual(values=c("black","red","darkgreen")) +
    labs(title= "County Specific Allocation Metrics", subtitle= "As rates", x="Indicator", y="Rate") +
    plotTheme + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="bottom")
```

# 12: Scenario 1: Allocation
```{r, warning = FALSE, message = FALSE, fig.height= 8, fig.width= 11}
chester <-
  dat2 %>%
    mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
    filter(NAME == "Chester") 

chester_landUse <- rbind(
  filter(chester, forest21 == 1 | wetlands21 == 1 ) %>%
  dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
  filter(chester, developed21 == 1) %>%
  dplyr::select() %>% mutate(Land_Use = "Developed"))

grid.arrange(
ggplot() +
  geom_sf(data=chester, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
  geom_point(data=chester_landUse, aes(x=xyC(chester_landUse)[,1], 
                                        y=xyC(chester_landUse)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Development\nDemand",
                    labels=substr(quintileBreaks(chester,"Development_Demand"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Development Potential, 2031: Chester") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)),

ggplot() +
  geom_sf(data=chester, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
  geom_point(data=chester_landUse, aes(x=xyC(chester_landUse)[,1], 
                                        y=xyC(chester_landUse)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Population\nChange",
                    labels=substr(quintileBreaks(chester,"pop_Change"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Projected Population, 2031: Chester") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)), ncol=2)
```

# 13: Scenario 2 Transportation
## 13.1: Highway Distance
```{r, warning = FALSE, message = FALSE, results = "hide"}
#dvHighways <-
#  st_read("C:/Users/ferna/OneDrive/Documents/ArcGIS/Projects/CPLN 6750/HW5/dv_roads.geojson") %>%
#  st_transform(st_crs(dvMSA)) %>%
#  st_intersection(dvMSA)

dvTransit <-
  st_read("/Users/luyiiwong/Documents/GitHub/LandUseModeling_HW5/DelawareValley/dv_transit.geojson") %>%
  st_transform(st_crs(dvMSA)) %>%
  st_intersection(dvMSA)
```

```{r warning = FALSE, message= FALSE}
ggplot() +
  geom_point(data=fishnet, 
             aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2],colour=layer),size=1.5) +
  geom_sf(data=dvTransit) +
  scale_colour_manual(values = palette2,
                      labels=c("No Change","N`w Development")) +
  labs(title = "New Development and New Transit",
       subtitle = "As fishnet centroids") +
  mapTheme
```
```{r, warning = FALSE, message = FALSE}
emptyRaster <- lc_change
emptyRaster[] <- NA

dvTransit_spdf <- as(dvTransit, "Spatial")
transit_raster <- rasterize(dvTransit, emptyRaster)

#highway_raster <- 
  #as(dvTransit,'Spatial') %>%
  #rasterize(.,emptyRaster)

transit_raster_distance <- distance(transit_raster)
names(transit_raster_distance) <- "distance_transit"

transitPoints <-
  rasterToPoints(transit_raster_distance) %>%
  as.data.frame() %>%
  st_as_sf(coords = c("x", "y"), crs = st_crs(dvMSA_fishnet))

transitPoints_fishnet <- 
  aggregate(transitPoints, dvMSA_fishnet, mean) %>%
  mutate(distance_transit = ifelse(is.na(distance_transit),0,distance_transit))

ggplot() +
  geom_sf(data=dvMSA) +
  geom_point(data=transitPoints_fishnet, aes(x=xyC(transitPoints_fishnet)[,1], 
                                             y=xyC(transitPoints_fishnet)[,2], 
                 colour=factor(ntile(distance_transit,5))),size=1.5) +
  scale_colour_manual(values = palette5,
                      labels=substr(quintileBreaks(transitPoints_fishnet,"distance_transit"),1,8),
                      name="Quintile/nBreaks") +
  geom_sf(data=dvTransit, colour = "red") +
  labs(title = "Distance to transit",
       subtitle = "As fishnet centroids; transit visualized in red") +
  mapTheme
```

## 13.2: Create a new dataset for the model
changing the distance from highway to distance to transit
```{r}
dat.2 <- 
  cbind(fishnet, transitPoints_fishnet, fishnetPopulation, aggregatedRasters)%>%
  dplyr::select(layer, developed, forest, farm, wetlands, otherUndeveloped, water,
                pop_2011, pop_2021, pop_Change, distance_transit,lagDevelopment) %>%
  st_join(studyAreaCounties) %>%
  mutate(developed10 = ifelse(layer == 1 & developed == 1, 0, developed)) %>%
  filter(water == 0) 
```

## 13.3: Create training set
```{r, warning = FALSE, message = FALSE}
set.seed(3456)
trainIndex <- 
  createDataPartition(dat$developed, p = .50,
                                  list = FALSE,
                                  times = 1)
transit.datTrain <- dat.2[ trainIndex,]
transit.datTest  <- dat.2[-trainIndex,]

```

```{r, warning = FALSE, message = FALSE}
# test model with transit
transitModel <- glm(layer ~ wetlands + forest  + farm + otherUndeveloped + lagDevelopment + pop_Change + 
              distance_transit, 
              family="binomial"(link="logit"), data = transit.datTrain) 
summary(transitModel)
#variables are highly significant
#r-square value: 14.8%
```

```{r, warning = FALSE, message = FALSE}
testSetProbs.2 <- 
  data.frame(class = datTest$layer,
             probs = predict(transitModel, transit.datTest, type="response")) 
  
ggplot(testSetProbs.2, aes(probs)) +
  geom_density(aes(fill=class), alpha=0.5) +
  scale_fill_manual(values = palette2,
                    labels=c("No Change","New Development")) +
  labs(title = "Histogram of test set predicted probabilities",
       x="Predicted Probabilities",y="Density") +
  plotTheme
```

## 13.4: Creating new dataframe for pop projection
```{r, warning = FALSE, message = FALSE}
dat.new <-
  aggregateRaster(theRasterList21, dat.2) %>%
  dplyr::select(developed21,forest21,farm21,wetlands21,otherUndeveloped21,water21) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat.2) %>%
  st_sf() %>%
  st_cast("POLYGON")

# considering land fragmentation
dat.new <-
  aggregateRaster(c(sensitiveRegions), dat.new) %>%
  dplyr::select(sensitiveRegions) %>%
  st_set_geometry(NULL) %>%
  bind_cols(.,dat.2) %>%
  st_sf()

```

## 13.5: Land allocation - population projection and development forcast
```{r, warning = FALSE, message = FALSE, fig.height= 8, fig.width= 11}
chester.new <-
  dat.new %>%
    mutate(Development_Demand = predict(transitModel, dat.new, type="response")) %>%
    filter(NAME == "Chester") 

chester_landUse.new <- rbind(
  filter(chester, forest21 == 1 | wetlands21 == 1 ) %>%
  dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
  filter(chester, developed21 == 1) %>%
  dplyr::select() %>% mutate(Land_Use = "Developed"))

grid.arrange(
ggplot() +
  geom_sf(data=chester.new, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
  geom_point(data=chester_landUse.new, aes(x=xyC(chester_landUse.new)[,1], 
                                        y=xyC(chester_landUse.new)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Development\nDemand",
                    labels=substr(quintileBreaks(chester.new,"Development_Demand"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Development Potential, 2031: Chester",
       subtitle = "Additional Transit") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)),

ggplot() +
  geom_sf(data=chester.new, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
  geom_point(data=chester_landUse.new, aes(x=xyC(chester_landUse.new)[,1], 
                                        y=xyC(chester_landUse.new)[,2], colour=Land_Use),
                                        shape = 15, size = 2) +
  geom_sf(data=st_intersection(dvHighways,filter(studyAreaCounties, NAME=="Chester")), size=2) +
  scale_fill_manual(values = palette5, name="Population\nChange",
                    labels=substr(quintileBreaks(chester.new,"pop_Change"),1,5)) +
  scale_colour_manual(values = c("black","red")) + 
  labs(title = "Projected Population, 2031: Chester",
       subtitle = "Additional Transit") + mapTheme +
  guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)), ncol=2)
```